Show the code
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)theme_set(theme_minimal())JSON-Daten wurden nicht importiert, da offenbar nur redundante Daten enthalten sind.
tar_load(data_all_fct)Der Roh-Datensatz verfügt über
Jede Zeile entspricht einem “Visit”.
data_all_fct_head100 <-
data_all_fct %>%
select(1:100) %>%
slice_head(n = 100) data_all_fct_head100 %>%
visdat::vis_dat()data_all_fct_head100 %>%
names() [1] "file_id" "idvisit"
[3] "visitip" "visitorid"
[5] "fingerprint" "actiondetails_0_type"
[7] "actiondetails_0_url" "actiondetails_0_pageidaction"
[9] "actiondetails_0_idpageview" "actiondetails_0_servertimepretty"
[11] "actiondetails_0_pageid" "actiondetails_0_eventcategory"
[13] "actiondetails_0_eventaction" "actiondetails_0_timestamp"
[15] "actiondetails_0_title" "actiondetails_0_subtitle"
[17] "actiondetails_1_type" "actiondetails_1_url"
[19] "actiondetails_1_pageidaction" "actiondetails_1_idpageview"
[21] "actiondetails_1_servertimepretty" "actiondetails_1_pageid"
[23] "actiondetails_1_timespent" "actiondetails_1_timespentpretty"
[25] "actiondetails_1_pageviewposition" "actiondetails_1_title"
[27] "actiondetails_1_subtitle" "actiondetails_1_timestamp"
[29] "actiondetails_2_type" "actiondetails_2_url"
[31] "actiondetails_2_pageidaction" "actiondetails_2_idpageview"
[33] "actiondetails_2_servertimepretty" "actiondetails_2_pageid"
[35] "actiondetails_2_eventcategory" "actiondetails_2_eventaction"
[37] "actiondetails_2_pageviewposition" "actiondetails_2_timestamp"
[39] "actiondetails_2_title" "actiondetails_2_subtitle"
[41] "actiondetails_3_type" "actiondetails_3_url"
[43] "actiondetails_3_pageidaction" "actiondetails_3_idpageview"
[45] "actiondetails_3_servertimepretty" "actiondetails_3_pageid"
[47] "actiondetails_3_eventcategory" "actiondetails_3_eventaction"
[49] "actiondetails_3_pageviewposition" "actiondetails_3_timestamp"
[51] "actiondetails_3_title" "actiondetails_3_subtitle"
[53] "actiondetails_4_type" "actiondetails_4_url"
[55] "actiondetails_4_pageidaction" "actiondetails_4_idpageview"
[57] "actiondetails_4_servertimepretty" "actiondetails_4_pageid"
[59] "actiondetails_4_timespent" "actiondetails_4_timespentpretty"
[61] "actiondetails_4_pageviewposition" "actiondetails_4_title"
[63] "actiondetails_4_subtitle" "actiondetails_4_timestamp"
[65] "actiondetails_5_type" "actiondetails_5_url"
[67] "actiondetails_5_pageidaction" "actiondetails_5_idpageview"
[69] "actiondetails_5_servertimepretty" "actiondetails_5_pageid"
[71] "actiondetails_5_eventcategory" "actiondetails_5_eventaction"
[73] "actiondetails_5_pageviewposition" "actiondetails_5_timestamp"
[75] "actiondetails_5_title" "actiondetails_5_subtitle"
[77] "actiondetails_6_type" "actiondetails_6_url"
[79] "actiondetails_6_pageidaction" "actiondetails_6_idpageview"
[81] "actiondetails_6_servertimepretty" "actiondetails_6_pageid"
[83] "actiondetails_6_eventcategory" "actiondetails_6_eventaction"
[85] "actiondetails_6_pageviewposition" "actiondetails_6_timestamp"
[87] "actiondetails_6_title" "actiondetails_6_subtitle"
[89] "actiondetails_7_type" "actiondetails_7_url"
[91] "actiondetails_7_pageidaction" "actiondetails_7_idpageview"
[93] "actiondetails_7_servertimepretty" "actiondetails_7_pageid"
[95] "actiondetails_7_timespent" "actiondetails_7_timespentpretty"
[97] "actiondetails_7_pageviewposition" "actiondetails_7_title"
[99] "actiondetails_7_subtitle" "actiondetails_7_timestamp"
data_all_fct_head100 %>%
glimpse()Rows: 100
Columns: 100
$ file_id <fct> matomo_export_2023-10-04.csv, matomo_…
$ idvisit <fct> 6, 5, 4, 2, 3, 1, 25, 26, 24, 22, 23,…
$ visitip <fct> 141.75.152.0, 141.75.168.0, 141.75.15…
$ visitorid <fct> de3a10060112d977, 92f090e11fdbefbe, d…
$ fingerprint <fct> 379a01acad44b5c7, fb231a0ec5c2f0c8, 3…
$ actiondetails_0_type <fct> event, action, action, action, action…
$ actiondetails_0_url <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_0_pageidaction <fct> 11, 122, 6, 6, 2, 2, 252, 254, 254, 2…
$ actiondetails_0_idpageview <fct> KjY5Mu, tv7t1g, OqlJp6, SqdDri, t8YsD…
$ actiondetails_0_servertimepretty <fct> "Oct 4, 2023 20:27:32", "Oct 4, 2023 …
$ actiondetails_0_pageid <fct> 483, 308, 305, 92, 222, 1, 972, 973, …
$ actiondetails_0_eventcategory <fct> click_button, NA, NA, NA, NA, NA, cli…
$ actiondetails_0_eventaction <fct> Kanäle, NA, NA, NA, NA, NA, Kanäle, N…
$ actiondetails_0_timestamp <fct> 2023-10-04 20:27:32, 2023-10-04 19:45…
$ actiondetails_0_title <fct> Event, HAnS, HAnS, HAnS, HAnS, HAnS, …
$ actiondetails_0_subtitle <fct> "Category: \"\"click_button', Action:…
$ actiondetails_1_type <fct> action, event, event, event, NA, even…
$ actiondetails_1_url <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_1_pageidaction <fct> 6, 123, 7, 7, NA, 3, 251, NA, 256, 2,…
$ actiondetails_1_idpageview <fct> awdDAu, tv7t1g, OqlJp6, SqdDri, NA, J…
$ actiondetails_1_servertimepretty <fct> "Oct 4, 2023 20:27:33", "Oct 4, 2023 …
$ actiondetails_1_pageid <fct> 484, 309, 306, 93, NA, 2, 974, NA, 97…
$ actiondetails_1_timespent <fct> 22, NA, NA, NA, NA, NA, 23, NA, NA, 6…
$ actiondetails_1_timespentpretty <fct> 22s, NA, NA, NA, NA, NA, 23s, NA, NA,…
$ actiondetails_1_pageviewposition <fct> 1, 1, 1, 1, NA, 1, 1, NA, 1, 2, 1, NA…
$ actiondetails_1_title <fct> HAnS, Event, Event, Event, NA, Event,…
$ actiondetails_1_subtitle <fct> "https://hans.th-nuernberg.de/?evalId…
$ actiondetails_1_timestamp <fct> 2023-10-04 20:27:33, 2023-10-04 19:45…
$ actiondetails_2_type <fct> event, event, action, action, NA, act…
$ actiondetails_2_url <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_2_pageidaction <fct> 11, 123, 10, 10, NA, 6, 256, NA, 251,…
$ actiondetails_2_idpageview <fct> KjY5Mu, tv7t1g, KjY5Mu, rU2DPV, NA, h…
$ actiondetails_2_servertimepretty <fct> "Oct 4, 2023 20:27:33", "Oct 4, 2023 …
$ actiondetails_2_pageid <fct> 485, 310, 307, 94, NA, 3, 975, NA, 97…
$ actiondetails_2_eventcategory <fct> click_button, click_button, NA, NA, N…
$ actiondetails_2_eventaction <fct> Medien, Abmelden, NA, NA, NA, NA, Med…
$ actiondetails_2_pageviewposition <fct> 1, 1, 2, 2, NA, 2, NA, NA, 2, 3, 1, N…
$ actiondetails_2_timestamp <fct> 2023-10-04 20:27:33, 2023-10-04 19:45…
$ actiondetails_2_title <fct> Event, Event, HAnS, HAnS, NA, HAnS, E…
$ actiondetails_2_subtitle <fct> "Category: \"\"click_button', Action:…
$ actiondetails_3_type <fct> event, action, NA, event, NA, event, …
$ actiondetails_3_url <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_3_pageidaction <fct> 7, 2, NA, 11, NA, 7, 252, NA, NA, NA,…
$ actiondetails_3_idpageview <fct> awdDAu, UTm1cZ, NA, rU2DPV, NA, hZ9df…
$ actiondetails_3_servertimepretty <fct> "Oct 4, 2023 20:27:44", "Oct 4, 2023 …
$ actiondetails_3_pageid <fct> 486, 311, NA, 95, NA, 4, 976, NA, NA,…
$ actiondetails_3_eventcategory <fct> click_button, NA, NA, click_channelca…
$ actiondetails_3_eventaction <fct> Kanäle, NA, NA, GDI, NA, Kanäle, Kanä…
$ actiondetails_3_pageviewposition <fct> 1, 2, NA, 2, NA, 2, 1, NA, NA, 4, 2, …
$ actiondetails_3_timestamp <fct> 2023-10-04 20:27:44, 2023-10-04 19:45…
$ actiondetails_3_title <fct> Event, HAnS, NA, Event, NA, Event, Ev…
$ actiondetails_3_subtitle <fct> "Category: \"\"click_button', Action:…
$ actiondetails_4_type <fct> action, event, NA, search, NA, action…
$ actiondetails_4_url <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_4_pageidaction <fct> 10, 3, NA, NA, NA, 10, 254, NA, NA, 2…
$ actiondetails_4_idpageview <fct> ItPeDS, UTm1cZ, NA, oP3co8, NA, YNDCa…
$ actiondetails_4_servertimepretty <fct> "Oct 4, 2023 20:27:44", "Oct 4, 2023 …
$ actiondetails_4_pageid <fct> 487, 312, NA, 96, NA, 5, 977, NA, NA,…
$ actiondetails_4_timespent <fct> 1275, NA, NA, NA, NA, 174, 1433, NA, …
$ actiondetails_4_timespentpretty <fct> 21 min 15s, NA, NA, NA, NA, 2 min 54s…
$ actiondetails_4_pageviewposition <fct> 2, 2, NA, 3, NA, 3, 2, NA, NA, 4, 3, …
$ actiondetails_4_title <fct> HAnS, Event, NA, Site Search, NA, HAn…
$ actiondetails_4_subtitle <fct> "https://hans.th-nuernberg.de/channel…
$ actiondetails_4_timestamp <fct> 2023-10-04 20:27:44, 2023-10-04 19:47…
$ actiondetails_5_type <fct> event, action, NA, action, NA, event,…
$ actiondetails_5_url <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_5_pageidaction <fct> 11, 6, NA, 16, NA, 11, 254, NA, NA, 2…
$ actiondetails_5_idpageview <fct> ItPeDS, kvsynp, NA, oP3co8, NA, YNDCa…
$ actiondetails_5_servertimepretty <fct> "Oct 4, 2023 20:48:58", "Oct 4, 2023 …
$ actiondetails_5_pageid <fct> 563, 313, NA, 97, NA, 6, 978, NA, NA,…
$ actiondetails_5_eventcategory <fct> click_button, NA, NA, NA, NA, click_b…
$ actiondetails_5_eventaction <fct> Kanäle, NA, NA, NA, NA, Medien, NA, N…
$ actiondetails_5_pageviewposition <fct> 2, 3, NA, 3, NA, 3, 3, NA, NA, 6, 3, …
$ actiondetails_5_timestamp <fct> 2023-10-04 20:48:58, 2023-10-04 19:47…
$ actiondetails_5_title <fct> Event, HAnS, NA, HAnS, NA, Event, HAn…
$ actiondetails_5_subtitle <fct> "Category: \"\"click_button', Action:…
$ actiondetails_6_type <fct> event, search, NA, event, NA, action,…
$ actiondetails_6_url <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_6_pageidaction <fct> 11, NA, NA, 17, NA, 6, 256, NA, NA, N…
$ actiondetails_6_idpageview <fct> ItPeDS, rM5GmP, NA, oP3co8, NA, IT1VG…
$ actiondetails_6_servertimepretty <fct> "Oct 4, 2023 20:48:59", "Oct 4, 2023 …
$ actiondetails_6_pageid <fct> 564, 314, NA, 98, NA, 7, 979, NA, NA,…
$ actiondetails_6_eventcategory <fct> click_button, NA, NA, click_videocard…
$ actiondetails_6_eventaction <fct> Medien, NA, NA, Kapitel9-5-Maschinens…
$ actiondetails_6_pageviewposition <fct> 2, 4, NA, 4, NA, 4, 3, NA, NA, 7, 3, …
$ actiondetails_6_timestamp <fct> 2023-10-04 20:48:59, 2023-10-04 19:47…
$ actiondetails_6_title <fct> Event, Site Search, NA, Event, NA, HA…
$ actiondetails_6_subtitle <fct> "Category: \"\"click_button', Action:…
$ actiondetails_7_type <fct> action, action, NA, action, NA, event…
$ actiondetails_7_url <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_7_pageidaction <fct> 6, 16, NA, 58, NA, 7, 255, NA, NA, NA…
$ actiondetails_7_idpageview <fct> Zaut9i, rM5GmP, NA, iyzwzi, NA, IT1VG…
$ actiondetails_7_servertimepretty <fct> "Oct 4, 2023 20:48:59", "Oct 4, 2023 …
$ actiondetails_7_pageid <fct> 565, 315, NA, 99, NA, 8, 980, NA, NA,…
$ actiondetails_7_timespent <fct> 1, 22, NA, 16, NA, NA, 112, NA, NA, N…
$ actiondetails_7_timespentpretty <fct> 1s, 22s, NA, 16s, NA, NA, 1 min 52s, …
$ actiondetails_7_pageviewposition <fct> 3, 4, NA, 5, NA, 4, 4, NA, NA, 8, 3, …
$ actiondetails_7_title <fct> HAnS, HAnS, NA, HAnS, NA, Event, HAnS…
$ actiondetails_7_subtitle <fct> "https://hans.th-nuernberg.de/?evalId…
$ actiondetails_7_timestamp <fct> 2023-10-04 20:48:59, 2023-10-04 19:47…
tar_load(data_slim)
data_slim %>%
slice(1:100) |>
gt()| nr | type | value | idvisit |
|---|---|---|---|
| 0 | type | action | 1 |
| 0 | url | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 |
| 0 | title | HAnS | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | pageloadtime | 0.18s | 1 |
| 0 | pageloadtimemilliseconds | 175 | 1 |
| 1 | type | event | 1 |
| 1 | url | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 1 | title | Event | 1 |
| 1 | subtitle | Category: ""login', Action: ""success"" | 1 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 |
| 1 | eventcategory | login | 1 |
| 1 | eventaction | success | 1 |
| 2 | type | action | 1 |
| 2 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 2 | timestamp | 2023-10-04 16:19:54 | 1 |
| 2 | title | HAnS | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 3 | type | event | 1 |
| 3 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventaction | Kanäle | 1 |
| 3 | timestamp | 2023-10-04 16:19:56 | 1 |
| 3 | title | Event | 1 |
| 3 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 4 | type | action | 1 |
| 4 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 4 | title | HAnS | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 4 | timestamp | 2023-10-04 16:19:56 | 1 |
| 5 | type | event | 1 |
| 5 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 5 | eventcategory | click_button | 1 |
| 5 | eventaction | Medien | 1 |
| 5 | timestamp | 2023-10-04 16:21:23 | 1 |
| 5 | title | Event | 1 |
| 5 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 6 | type | action | 1 |
| 6 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 6 | timestamp | 2023-10-04 16:21:23 | 1 |
| 6 | title | HAnS | 1 |
| 6 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 7 | type | event | 1 |
| 7 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 7 | title | Event | 1 |
| 7 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 7 | timestamp | 2023-10-04 16:25:22 | 1 |
| 7 | eventcategory | click_button | 1 |
| 7 | eventaction | Medien | 1 |
| 8 | type | event | 1 |
| 8 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 8 | title | Event | 1 |
| 8 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 8 | timestamp | 2023-10-04 16:25:23 | 1 |
| 8 | eventcategory | click_button | 1 |
| 8 | eventaction | Kanäle | 1 |
| 9 | type | action | 1 |
| 9 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 9 | timestamp | 2023-10-04 16:25:23 | 1 |
| 9 | title | HAnS | 1 |
| 9 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 10 | type | action | 1 |
| 10 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 10 | title | HAnS | 1 |
| 10 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 10 | timestamp | 2023-10-04 16:25:24 | 1 |
| 11 | type | event | 1 |
| 11 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 11 | eventcategory | click_button | 1 |
| 11 | eventaction | Medien | 1 |
| 11 | timestamp | 2023-10-04 16:25:24 | 1 |
| 11 | title | Event | 1 |
| 11 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 12 | type | action | 1 |
| 12 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 12 | title | HAnS | 1 |
| 12 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 12 | timestamp | 2023-10-04 16:25:26 | 1 |
| 13 | type | event | 1 |
| 13 | url | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 13 | title | Event | 1 |
| 13 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 13 | timestamp | 2023-10-04 16:25:26 | 1 |
| 13 | eventcategory | click_button | 1 |
| 13 | eventaction | Kanäle | 1 |
| 14 | type | event | 1 |
| 14 | url | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 14 | eventcategory | click_channelcard | 1 |
| 14 | eventaction | GESOA | 1 |
| 14 | timestamp | 2023-10-04 16:25:27 | 1 |
| 14 | title | Event | 1 |
| 14 | subtitle | Category: ""click_channelcard', Action: ""GESOA"" | 1 |
| 15 | type | search | 1 |
| 15 | title | Site Search | 1 |
| 15 | subtitle | GESOA | 1 |
| 15 | timestamp | 2023-10-04 16:25:27 | 1 |
| 15 | sitesearchkeyword | GESOA | 1 |
| 15 | sitesearchcount | 0 | 1 |
| 16 | type | action | 1 |
Entfernt man Developer, Admins und Lecturers aus dem Roh-Datensatz so bleiben weniger Zeilen übrig:
tar_load(data_users_only)tar_load(count_action)tar_load(config)Laut config.yaml ist das aktuelle Semester, d.h. 24-ss.
tar_load(time_minmax)time_minmax |>
summarise(time_min = min(time_min),
time_max = max(time_max)) |>
gt()| time_min | time_max |
|---|---|
| 2023-10-04 16:19:46 | 2023-10-09 22:21:28 |
Diese Statistik wurde auf Basis des Datenobjekts data_slim berechnet.
tar_load(time_since_last_visit)
time_since_last_visit <-
time_since_last_visit |>
mutate(dayssincelastvisit = as.numeric(dayssincelastvisit))
time_since_last_visit |>
datawizard::describe_distribution(dayssincelastvisit) |>
knitr::kable()| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| dayssincelastvisit | 1.0125 | 0.1114513 | 0 | 1 | 2 | 8.858956 | 77.44913 | 160 | 0 |
time_since_last_visit |>
ggplot(aes(x=dayssincelastvisit)) +
geom_density()Die folgenden Statistiken beruhen auf dem Datensatz data_slim:
glimpse(data_slim)Rows: 19,958
Columns: 4
$ nr <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3,…
$ type <fct> type, url, timestamp, title, subtitle, pageloadtime, pageloadt…
$ value <chr> "action", "https://hans.th-nuernberg.de/login?evalId=none&role…
$ idvisit <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
nr fasst die Nummer der Aktion innerhalb eines bestimmten Visits.
count_action |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 33.94 | 48.36 | 46.50 | 1.00 | 211.00 | 1.97 | 3.46 | 86.00 | 0.00 |
nr_max gibt den Maximalwert von nr zurück, sagt also, wie viele Aktionen maximal von einem Visitor ausgeführt wurden.
Betrachtet man die Anzahl der Aktionen pro Visitor näher, so fällt auf, dass der Maximalwert (499) sehr häufig vorkommt:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_col()Hier noch in einer anderen Darstellung:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_point()Der Maximalwert ist einfach auffällig häufig:
count_action |>
count(nr_max == 499) |>
gt()| nr_max == 499 | n |
|---|---|
| FALSE | 86 |
Es erscheint plausibel, dass der Maximalwert alle “gekappten” (zensierten, abgeschnittenen) Werte fasst, also viele Werte, die eigentlich größer wären (aber dann zensiert wurden).
count_action2 <-
count_action |>
filter(nr_max != 499)
count_action2 |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 33.94 | 48.36 | 46.50 | 1.00 | 211.00 | 1.97 | 3.46 | 86.00 | 0.00 |
count_action_avg = mean(count_action$nr_max)
count_action_sd = sd(count_action$nr_max)
count_action |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd,
y = 0,
xend = count_action_avg + count_action_sd,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg, y = 1500, label = "MW") +
annotate("label", x = count_action_avg + count_action_sd, y = 0, label = "SD") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")count_action_avg2 = mean(count_action2$nr_max)
count_action_sd2 = sd(count_action2$nr_max)
count_action2 |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
title = "Verteilung der User-Aktionen pro Visit",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg2,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd2,
y = 0,
xend = count_action_avg2 + count_action_sd2,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg2, y = 1500, label = "MW", vjust = "top") +
annotate("label", x = count_action_avg2 + count_action_sd2, y = 0, label = "SD", vjust = "bottom") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")Die Visit-Zeit wurde auf 600 Min. trunkiert/begrenzt.
tar_load(time_spent)
tar_load(time_duration)
time_spent <-
time_spent |>
mutate(t_min = as.numeric(time_diff, units = "mins")) |>
filter(t_min < 600)time_spent |>
summarise(
mean_time_diff = round(mean(time_diff), 2),
sd_time_diff = sd(time_diff),
min_time_diff = min(time_diff),
max_time_diff = max(time_diff)
) |>
summarise(
mean_time_diff_avg = mean(mean_time_diff),
sd_time_diff_avg = mean(sd_time_diff, na.rm = TRUE),
min_time_diff_avg = mean(min_time_diff),
max_time_diff_avg = mean(max_time_diff)
) |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_time_diff_avg | sd_time_diff_avg | min_time_diff_avg | max_time_diff_avg |
|---|---|---|---|
| 169.23 | 0.00 | 169.23 | 169.23 |
tar_load(time_duration)
time_duration |>
summarise(duration_sec_avg = mean(visitduration_sec, na.rm = TRUE)) |>
mutate(duration_min_avg = duration_sec_avg / 60) duration_sec_avg duration_min_avg
1 961.3875 16.02313
time_spent |>
summarise(
mean_t_min = mean(t_min),
sd_t_min = sd(t_min),
min_t_min = min(t_min),
max_t_min = max(t_min)
) |>
summarise(
mean_t_min_avg = mean(mean_t_min),
sd_t_min_avg = mean(sd_t_min, na.rm = TRUE),
min_t_min_avg = mean(min_t_min),
max_t_min_avg = mean(max_t_min)
) |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_t_min_avg | sd_t_min_avg | min_t_min_avg | max_t_min_avg |
|---|---|---|---|
| 169.23 | 0.00 | 169.23 | 169.23 |
time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram() +
scale_x_time() +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten")time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 5) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in Minuten",
title = "Verweildauer in HaNS pro Visit",
caption = "binwidth = 5 Min.")time_spent2 <-
time_spent |>
filter(t_min > 1, t_min < 120)
time_spent2 |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 10) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten",
title = "Verweildauer begrenzt auf 1-120 Minuten",
caption = "bindwidth = 10 Min.")tar_load(count_action_type)count_action_type |>
count(category, sort = TRUE) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| category | n | prop |
|---|---|---|
| video | 1895 | 0.63 |
| visit_page | 417 | 0.14 |
| click_slideChange | 396 | 0.13 |
| NA | 86 | 0.03 |
| click_topic | 51 | 0.02 |
| login | 49 | 0.02 |
| Search Results Count | 43 | 0.01 |
| in_media_search | 26 | 0.01 |
| Medien | 23 | 0.01 |
| GESOA | 9 | 0.00 |
| Kanäle | 9 | 0.00 |
| click_channelcard | 1 | 0.00 |
eventcategoryWas machen die Visitors eigentlich? Und wie oft?
data_slim |>
filter(type == "eventcategory") |>
count(value, sort = TRUE) |>
gt()| value | n |
|---|---|
| videoplayer_click | 1719 |
| click_slideChange | 396 |
| click_button | 50 |
| login | 49 |
| click_start_resize | 33 |
| click_stop_resize | 33 |
| click_videocard | 31 |
| click_topic_details | 30 |
| click_topic_position_using_image | 20 |
| click_transcript_word | 20 |
| in_media_search | 18 |
| logout | 10 |
| click_videocard_search_lecturer | 9 |
| in_media_search_results | 5 |
| click_channelcard | 4 |
| eval | 4 |
| click_in_media_search_results | 3 |
| click_toggle | 2 |
| click_videocard_search_course_acronym | 2 |
| click_topic_position_using_link | 1 |
| userRole | 1 |
count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "User-Aktion",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie"
) +
theme_minimal() +
scale_x_continuous(labels = scales::comma)count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "Anazhl der User-Aktionen",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie",
caption = "Log10-Skala"
) +
theme_minimal() +
scale_x_log10()tar_load(time_visit_wday)# Define a vector with the names of the days of the week
# Note: Adjust the start of the week (Sunday or Monday) as per your requirement
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
# Replace numbers with day names
time_visit_wday$dow2 <- factor(days_of_week[time_visit_wday$dow],
levels = days_of_week)time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
labs(
title = "HaNS-Nutzer sind keine Frühaufsteher",
x = "Uhrzeit",
y = "Anteil"
) # coord_polar()time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") +
coord_polar()time2 <-
time_visit_wday |>
ungroup() |>
mutate(date = as.Date(date_time))
time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(1, 1)) + # (1 day, 1 hour)
scale_x_date(date_breaks = "1 month") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = dow)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week",
y = "Day of Week") +
scale_y_continuous(breaks = 1:7)data_slim |>
filter(type == "eventcategory") |>
filter(str_detect(value, "llm")) |>
count(value, sort = TRUE) |>
mutate(prop = n / round(sum(n), 2)) |>
gt()| value | n | prop |
|---|
data_slim |>
mutate(has_llm = str_detect(value, "llm")) |>
group_by(idvisit) |>
summarise(llm_used_during_visit = any(has_llm == TRUE)) |>
count(llm_used_during_visit) |>
mutate(prop = round(n /sum(n), 2)) |>
gt()| llm_used_during_visit | n | prop |
|---|---|---|
| FALSE | 86 | 1 |
tar_load(idvisit_has_llm)
idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| uses_llm | n | prop |
|---|---|---|
| 2023-10 | ||
| FALSE | 95 | 1 |
idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = year_month, y = prop, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anteile)")idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
ggplot(aes(x = year_month, y = n, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anzahl)")tar_load(data_slim)data_slim |>
filter(type == "subtitle") |>
filter(!is.na(value) & value != "") |>
count(click_transcript_word = str_detect(value, "click_transcript_word")) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| click_transcript_word | n | prop |
|---|---|---|
| FALSE | 2985 | 0.99 |
| TRUE | 20 | 0.01 |
tar_load(data_long)ai_actions_count <-
data_long |>
filter(str_detect(value, "transcript")) |>
count(value)
ai_actions_count |>
gt()| value | n |
|---|---|
| Category: ""click_transcript_word', Action: ""word: Bei - pos: 346.3 - index: 765"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Diesen - pos: 453.9 - index: 1009"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Fall - pos: 540.22 - index: 1233"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Ja, - pos: 426.74 - index: 944"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Mayer - pos: 1140.58 - index: 2628"" | 2 |
| Category: ""click_transcript_word', Action: ""word: Professionen - pos: 356.74 - index: 797"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Promotionsrecht, - pos: 823.28 - index: 1864"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Schauen - pos: 498.2 - index: 1124"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Und - pos: 979.54 - index: 2229"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Wenn - pos: 330.04 - index: 731"" | 1 |
| Category: ""click_transcript_word', Action: ""word: Wir - pos: 659.98 - index: 1501"" | 1 |
| Category: ""click_transcript_word', Action: ""word: die - pos: 1731.44 - index: 3895"" | 1 |
| Category: ""click_transcript_word', Action: ""word: gibt - pos: 473.12 - index: 1064"" | 1 |
| Category: ""click_transcript_word', Action: ""word: hat, - pos: 382.66 - index: 850"" | 1 |
| Category: ""click_transcript_word', Action: ""word: keinerlei - pos: 1740.08 - index: 3917"" | 1 |
| Category: ""click_transcript_word', Action: ""word: kleinen - pos: 972.18 - index: 2213"" | 1 |
| Category: ""click_transcript_word', Action: ""word: macht - pos: 126.54 - index: 260"" | 1 |
| Category: ""click_transcript_word', Action: ""word: sind - pos: 1098.82 - index: 2536"" | 1 |
| Category: ""click_transcript_word', Action: ""word: wissenschaftlichen - pos: 415.98 - index: 920"" | 1 |
| click_transcript_word | 20 |
ai_actions_count |>
mutate(value = case_when(
str_detect(value, "click_transcript_word.*") ~ "click transcript word",
TRUE ~ value
)) |>
count(value, sort = TRUE) |>
gt()| value | n |
|---|---|
| click transcript word | 20 |
tar_load(ai_transcript_clicks_per_month)ai_transcript_clicks_per_month |>
count(year_month, clicks_transcript_any) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| clicks_transcript_any | n | prop |
|---|---|---|
| 2023-10 | ||
| FALSE | 87 | 0.92 |
| TRUE | 8 | 0.08 |